# ------------------------------------- #
# Replication code for:
#
# Rathbun, Brian, Christopher Sebastian Parker, and Caleb Pomeroy "Separate but Unequal: Ethnocentrism and Racialization 
# Explain the 'Democratic' Peace in Public Opinion," American Political Science Review.
#
# This script reproduces the word embedding analyses presented in the main text and appendices.
# ------------------------------------- #

# --- libraries --- #
library(lsa)
library(ggplot2)
library(conText)
set.seed(1912)

# --- set working directory
setwd("~/Downloads/replication_files/")

# --- load data
glove_vectors <- readRDS("data/glove_200d_vectors.rds")
glove_vectors_300d <- readRDS("data/glove_300d_vectors.rds")
uk_vectors <- readRDS("data/uk_elite_vectors.rds")

# --- embedding analysis: cosine estimates inspired by Acharya et al's framework --- #
# here, we estimate (1) the "total" cosine similarity between democracy and peace,
# (2) the same cosine similarity with democracy averaged with (or "exposed" to) racial terms, and
# (3) the cosine similarity eliminated by those racial terms.
# to be clear, even though we draw inspiration from Acharya et al (2018), this analysis is entirely correlational.

# --- define dictionaries
democ_words <- c("democracy", "democratic", "democratically", "elect", "elections", "elected")
peace_vector <- colMeans(glove_vectors[c("peace", "harmony", "agreement", "diplomacy"),])
nonwhite_words <- c("non-white", "non-western", "non-caucasian", "non-european") 
white_words <- c("white", "western", "caucasian", "european") 
dyadic_vector <- colMeans(glove_vectors[c("our", "us", "we", "their", "them", "they"),])
war_vector <- colMeans(glove_vectors[c("war", "disharmony", "disagreement", "hostility"),])

# --- cosine sims between racial terms and democracy
# cosine sim between white terms and democracy terms = 0.389, as mentioned in the paper's text
cosine(colMeans(glove_vectors[white_words,]), colMeans(glove_vectors[democ_words,])) 
# cosine sim between nonwhite terms and democracy terms = -0.003, as mentioned in the paper's text
cosine(colMeans(glove_vectors[nonwhite_words,]), colMeans(glove_vectors[democ_words,])) 

# --- overall cosine similarity (akin to "total effect" of democracy on peace)
# cosine similarity between democracy terms and peace terms, resampling the democracy dictionary to produce uncertainty estimates
set.seed(123)
dem_total_effect_resample <- replicate(1000, cosine(colMeans(glove_vectors[sample(democ_words, length(democ_words), replace = T),]),
                                                    peace_vector))
dem_total_effect <- mean(dem_total_effect_resample) # average cosine sim, akin to "total effect" i.e. ATE
total_ci_95 <- quantile(dem_total_effect_resample, probs = c(0.025, 0.975)) # 95% CI
total_ci_90 <- quantile(dem_total_effect_resample, probs = c(0.05, 0.95)) # 90% CI

# --- cosine sim "fixing" nonwhite (akin to direct effect or "exposing" the democ vector to nonwhite)
# cosine sim between mean(democracy terms, nonwhite terms) and peace terms, resampling the democracy and nonwhite dictionaries to produce uncertainty estimates
set.seed(123)
dem_acde_nonwhite_resample <- replicate(1000, cosine(rowMeans(cbind(colMeans(glove_vectors[sample(democ_words, length(democ_words), replace = T),]),
                                                                   colMeans(glove_vectors[sample(nonwhite_words, length(nonwhite_words), replace = T),]))),
                                               peace_vector))
dem_acde_nonwhite <- mean(dem_acde_nonwhite_resample) # average cosine sim, akin to "direct effect" of democracy fixing nonwhite
acde_ci_95 <- quantile(dem_acde_nonwhite_resample, probs = c(0.025, 0.975)) # 95% CI
acde_ci_90 <- quantile(dem_acde_nonwhite_resample, probs = c(0.05, 0.95)) # 90% CI

# --- cosine similarity eliminated by nonwhite terms (akin to "eliminated effect")
dem_nonwhite_eliminated <- mean(dem_total_effect_resample-dem_acde_nonwhite_resample)
eliminated_ci_95 <- quantile(dem_total_effect_resample-dem_acde_nonwhite_resample, probs = c(0.025, 0.975))
eliminated_ci_90 <- quantile(dem_total_effect_resample-dem_acde_nonwhite_resample, probs = c(0.05, 0.95))

dem_nonwhite_eliminated/dem_total_effect # about 40% of cosine(dem, peace) is eliminated by nonwhite terms, as mentioned in the paper's text

plot_df_nonwhite <- 
  data.frame(type = c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race"),
             est = c(dem_total_effect, dem_acde_nonwhite, dem_nonwhite_eliminated),
             ci_95_high = c(total_ci_95[2], acde_ci_95[2], eliminated_ci_95[2]),
             ci_95_low = c(total_ci_95[1], acde_ci_95[1], eliminated_ci_95[1]),
             ci_90_high = c(total_ci_90[2], acde_ci_90[2], eliminated_ci_90[2]),
             ci_90_low = c(total_ci_90[1], acde_ci_90[1], eliminated_ci_90[1]))

plot_df_nonwhite$type <- factor(plot_df_nonwhite$type, levels = rev(c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race")))
plot_df_nonwhite$country_race <- c("Unexposed to Race", "Averaged with Nonwhite Vector", "Averaged with Nonwhite Vector")
plot_df_nonwhite$country_race <- factor(plot_df_nonwhite$country_race, levels = c("Unexposed to Race", "Averaged with Nonwhite Vector"))

# --- cosine sim "fixing" white (akin to direct effect or "exposing" the democ vector to white)
set.seed(123)
dem_acde_white_resample <- replicate(1000, cosine(rowMeans(cbind(colMeans(glove_vectors[sample(democ_words, length(democ_words), replace = T),]),
                                                                 colMeans(glove_vectors[sample(white_words, length(white_words), replace = T),]))),
                                            peace_vector))
dem_acde_white <- mean(dem_acde_white_resample) # average cosine sim, akin to "direct effect" of democracy fixing white
acde_ci_95 <- quantile(dem_acde_white_resample, probs = c(0.025, 0.975)) # 95% CI
acde_ci_90 <- quantile(dem_acde_white_resample, probs = c(0.05, 0.95)) # 90% CI

# --- cosine similarity eliminated by white terms (akin to "eliminated effect")
dem_white_eliminated <- mean(dem_total_effect_resample-dem_acde_white_resample)
eliminated_ci_95 <- quantile(dem_total_effect_resample-dem_acde_white_resample, probs = c(0.025, 0.975))
eliminated_ci_90 <- quantile(dem_total_effect_resample-dem_acde_white_resample, probs = c(0.05, 0.95))

plot_df_white <- 
  data.frame(type = c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race"),
             est = c(dem_total_effect, dem_acde_white, dem_white_eliminated),
             ci_95_high = c(total_ci_95[2], acde_ci_95[2], eliminated_ci_95[2]),
             ci_95_low = c(total_ci_95[1], acde_ci_95[1], eliminated_ci_95[1]),
             ci_90_high = c(total_ci_90[2], acde_ci_90[2], eliminated_ci_90[2]),
             ci_90_low = c(total_ci_90[1], acde_ci_90[1], eliminated_ci_90[1]))

plot_df_white$type <- factor(plot_df_white$type, levels = rev(c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race")))
plot_df_white$country_race <- c("Unexposed to Race", "Averaged with White Vector", "Averaged with White Vector")
plot_df_white$country_race <- factor(plot_df_white$country_race, levels = c("Unexposed to Race", "Averaged with White Vector"))

plot_df <- rbind(plot_df_nonwhite, plot_df_white)
# --- figure 5
ggplot(plot_df, aes(x = est, y = type, group = country_race, color = country_race)) +
  geom_vline(xintercept = 0, size = .8, color = "black") +
  geom_linerange(aes(xmin = ci_95_low, xmax = ci_95_high), size = 1.3, color = "gray10", position = position_dodge(width = .5)) +
  geom_linerange(aes(xmin = ci_90_low, xmax = ci_90_high), size = 2.3, position = position_dodge(width = .5)) +
  geom_point(size = 5.5, color = "black", position = position_dodge(width = .5)) +
  geom_point(size = 4.5,position = position_dodge(width = .5)) +  
  scale_color_manual(values=c("gray30", "#92978a", "#c5bec2")) +
  labs(y = NULL, x = "Cosine Estimate", color = "Democracy Vector:") + #, tag = "A"
  theme_minimal() +
  guides(fill = guide_legend(byrow = TRUE)) +
  theme(legend.title = element_text(size = 14),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 14),
        axis.text.x = element_text(size = 14),
        axis.line.y.right = element_blank(),
        axis.title.x = element_text(size = 17, margin = margin(t=15)),
        panel.spacing.x = unit(1.5, "lines"),
        panel.grid.minor.x = element_blank(),
        legend.key = element_rect(color = NA, fill = NA),
        legend.key.size = unit(1, "cm"),
        plot.margin = margin(30,30,30,30))

# --- table B14
t_B14_total <- createTexreg(coef.names = "Total Similarity of Democracy",
                    coef = subset(plot_df, country_race == "Unexposed to Race")[1,]$est,
                    ci.low = subset(plot_df, country_race == "Unexposed to Race")[1,]$ci_95_low,
                    ci.up = subset(plot_df, country_race == "Unexposed to Race")[1,]$ci_95_high,
                    model.name = "Unexposed to Race")

t_B14_white <- createTexreg(coef.names = c("Direct Similarity of Democracy Fixing Race", "Similarity of Democracy Eliminated by Race"),
                            coef = subset(plot_df, country_race == "Averaged with White Vector")$est,
                            ci.low = subset(plot_df, country_race == "Averaged with White Vector")$ci_95_low,
                            ci.up = subset(plot_df, country_race == "Averaged with White Vector")$ci_95_high,
                            model.name = "Averaged with White Vector")

t_B14_nonwhite <- createTexreg(coef.names = c("Direct Similarity of Democracy Fixing Race", "Similarity of Democracy Eliminated by Race"),
                            coef = subset(plot_df, country_race == "Averaged with Nonwhite Vector")$est,
                            ci.low = subset(plot_df, country_race == "Averaged with Nonwhite Vector")$ci_95_low,
                            ci.up = subset(plot_df, country_race == "Averaged with Nonwhite Vector")$ci_95_high,
                            model.name = "Averaged with Nonwhite Vector")

screenreg(list(t_B14_total, t_B14_white, t_B14_nonwhite))

# --- embedding analysis robustness check: 300 dimensional model, rather than 200D --- #
# here, we run the same analysis above but use the 300 dimensional GloVe vectors (rather than 200D)
peace_vector_300 <- colMeans(glove_vectors_300d[c("peace", "harmony", "agreement", "diplomacy"),])
# --- overall cosine similarity (akin to "total effect" of democracy on peace)
# cosine similarity between democracy terms and peace terms, resampling the democracy dictionary to produce uncertainty estimates
set.seed(123)
dem_total_effect_resample <- replicate(1000, cosine(colMeans(glove_vectors_300d[sample(democ_words, length(democ_words), replace = T),]),
                                                    peace_vector_300))
dem_total_effect <- mean(dem_total_effect_resample) # average cosine sim, akin to "total effect" i.e. ATE
total_ci_95 <- quantile(dem_total_effect_resample, probs = c(0.025, 0.975)) # 95% CI
total_ci_90 <- quantile(dem_total_effect_resample, probs = c(0.05, 0.95)) # 90% CI

# --- cosine sim "fixing" nonwhite (akin to direct effect or "exposing" the democ vector to nonwhite)
# cosine similarity between mean(democracy terms, nonwhite terms) and peace terms, resampling the democracy and nonwhite dictionaries to produce uncertainty estimates
set.seed(123)
dem_acde_nonwhite_resample <- replicate(1000, cosine(rowMeans(cbind(colMeans(glove_vectors_300d[sample(democ_words, length(democ_words), replace = T),]),
                                                                    colMeans(glove_vectors_300d[sample(nonwhite_words, length(nonwhite_words), replace = T),]))),
                                                     peace_vector_300))
dem_acde_nonwhite <- mean(dem_acde_nonwhite_resample) # average cosine sim, akin to "direct effect" of democracy fixing nonwhite
acde_ci_95 <- quantile(dem_acde_nonwhite_resample, probs = c(0.025, 0.975)) # 95% CI
acde_ci_90 <- quantile(dem_acde_nonwhite_resample, probs = c(0.05, 0.95)) # 90% CI

# --- cosine similarity eliminated by nonwhite terms (akin to "eliminated effect")
dem_nonwhite_eliminated <- mean(dem_total_effect_resample-dem_acde_nonwhite_resample)
eliminated_ci_95 <- quantile(dem_total_effect_resample-dem_acde_nonwhite_resample, probs = c(0.025, 0.975))
eliminated_ci_90 <- quantile(dem_total_effect_resample-dem_acde_nonwhite_resample, probs = c(0.05, 0.95))

plot_df_nonwhite <- 
  data.frame(type = c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race"),
             est = c(dem_total_effect, dem_acde_nonwhite, dem_nonwhite_eliminated),
             ci_95_high = c(total_ci_95[2], acde_ci_95[2], eliminated_ci_95[2]),
             ci_95_low = c(total_ci_95[1], acde_ci_95[1], eliminated_ci_95[1]),
             ci_90_high = c(total_ci_90[2], acde_ci_90[2], eliminated_ci_90[2]),
             ci_90_low = c(total_ci_90[1], acde_ci_90[1], eliminated_ci_90[1]))

plot_df_nonwhite$type <- factor(plot_df_nonwhite$type, levels = rev(c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race")))
plot_df_nonwhite$country_race <- c("Unexposed to Race", "Averaged with Nonwhite Vector", "Averaged with Nonwhite Vector")
plot_df_nonwhite$country_race <- factor(plot_df_nonwhite$country_race, levels = c("Unexposed to Race", "Averaged with Nonwhite Vector"))

# --- cosine sim "fixing" white (akin to direct effect or "exposing" the democ vector to white)
set.seed(123)
dem_acde_white_resample <- replicate(1000, cosine(rowMeans(cbind(colMeans(glove_vectors_300d[sample(democ_words, length(democ_words), replace = T),]),
                                                                 colMeans(glove_vectors_300d[sample(white_words, length(white_words), replace = T),]))),
                                                  peace_vector_300))
dem_acde_white <- mean(dem_acde_white_resample) # average cosine sim, akin to "direct effect" of democracy fixing white
acde_ci_95 <- quantile(dem_acde_white_resample, probs = c(0.025, 0.975)) # 95% CI
acde_ci_90 <- quantile(dem_acde_white_resample, probs = c(0.05, 0.95)) # 90% CI

# --- cosine similarity eliminated by white terms (akin to "eliminated effect")
dem_white_eliminated <- mean(dem_total_effect_resample-dem_acde_white_resample)
eliminated_ci_95 <- quantile(dem_total_effect_resample-dem_acde_white_resample, probs = c(0.025, 0.975))
eliminated_ci_90 <- quantile(dem_total_effect_resample-dem_acde_white_resample, probs = c(0.05, 0.95))

plot_df_white <- 
  data.frame(type = c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race"),
             est = c(dem_total_effect, dem_acde_white, dem_white_eliminated),
             ci_95_high = c(total_ci_95[2], acde_ci_95[2], eliminated_ci_95[2]),
             ci_95_low = c(total_ci_95[1], acde_ci_95[1], eliminated_ci_95[1]),
             ci_90_high = c(total_ci_90[2], acde_ci_90[2], eliminated_ci_90[2]),
             ci_90_low = c(total_ci_90[1], acde_ci_90[1], eliminated_ci_90[1]))

plot_df_white$type <- factor(plot_df_white$type, levels = rev(c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race")))
plot_df_white$country_race <- c("Unexposed to Race", "Averaged with White Vector", "Averaged with White Vector")
plot_df_white$country_race <- factor(plot_df_white$country_race, levels = c("Unexposed to Race", "Averaged with White Vector"))

plot_df <- rbind(plot_df_nonwhite, plot_df_white)
# --- figure A14
ggplot(plot_df, aes(x = est, y = type, group = country_race, color = country_race)) +
  geom_vline(xintercept = 0, size = .8, color = "black") +
  geom_linerange(aes(xmin = ci_95_low, xmax = ci_95_high), size = 1.3, color = "gray10", position = position_dodge(width = .5)) +
  geom_linerange(aes(xmin = ci_90_low, xmax = ci_90_high), size = 2.3, position = position_dodge(width = .5)) +
  geom_point(size = 5.5, color = "black", position = position_dodge(width = .5)) +
  geom_point(size = 4.5,position = position_dodge(width = .5)) +  
  scale_color_manual(values=c("gray30", "#92978a", "#c5bec2")) +
  labs(y = NULL, x = "Cosine Estimate", color = "Democracy Vector:") + #, tag = "A"
  theme_minimal() +
  guides(fill = guide_legend(byrow = TRUE)) +
  theme(legend.title = element_text(size = 14),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 14),
        axis.text.x = element_text(size = 14),
        axis.line.y.right = element_blank(),
        axis.title.x = element_text(size = 17, margin = margin(t=15)),
        panel.spacing.x = unit(1.5, "lines"),
        panel.grid.minor.x = element_blank(),
        legend.key = element_rect(color = NA, fill = NA),
        legend.key.size = unit(1, "cm"),
        plot.margin = margin(30,30,30,30))

# --- table B15
t_B15_total <- createTexreg(coef.names = "Total Similarity of Democracy",
                            coef = subset(plot_df, country_race == "Unexposed to Race")[1,]$est,
                            ci.low = subset(plot_df, country_race == "Unexposed to Race")[1,]$ci_95_low,
                            ci.up = subset(plot_df, country_race == "Unexposed to Race")[1,]$ci_95_high,
                            model.name = "Unexposed to Race")

t_B15_white <- createTexreg(coef.names = c("Direct Similarity of Democracy Fixing Race", "Similarity of Democracy Eliminated by Race"),
                            coef = subset(plot_df, country_race == "Averaged with White Vector")$est,
                            ci.low = subset(plot_df, country_race == "Averaged with White Vector")$ci_95_low,
                            ci.up = subset(plot_df, country_race == "Averaged with White Vector")$ci_95_high,
                            model.name = "Averaged with White Vector")

t_B15_nonwhite <- createTexreg(coef.names = c("Direct Similarity of Democracy Fixing Race", "Similarity of Democracy Eliminated by Race"),
                               coef = subset(plot_df, country_race == "Averaged with Nonwhite Vector")$est,
                               ci.low = subset(plot_df, country_race == "Averaged with Nonwhite Vector")$ci_95_low,
                               ci.up = subset(plot_df, country_race == "Averaged with Nonwhite Vector")$ci_95_high,
                               model.name = "Averaged with Nonwhite Vector")

screenreg(list(t_B15_total, t_B15_white, t_B15_nonwhite))

# --- embedding analysis robustness check: include dyadic terms --- #
# here, we run the same analysis above but incorporate dyadic terms into the dictionary to help clarify
# the role of joint democracy in dyadic understandings of the democratic peace
# --- overall cosine similarity (akin to "total effect" of democracy on peace)
# cosine similarity between democracy terms and peace terms, resampling the democracy dictionary to produce uncertainty estimates
set.seed(123)
dem_total_effect_resample <- replicate(1000, cosine(rowMeans(cbind(colMeans(glove_vectors[sample(democ_words, length(democ_words), replace = T),]),
                                                                   dyadic_vector)),
                                                    peace_vector))
dem_total_effect <- mean(dem_total_effect_resample) # average cosine sim, akin to "total effect" i.e. ATE
total_ci_95 <- quantile(dem_total_effect_resample, probs = c(0.025, 0.975)) # 95% CI
total_ci_90 <- quantile(dem_total_effect_resample, probs = c(0.05, 0.95)) # 90% CI

# --- cosine sim "fixing" nonwhite (akin to direct effect or "exposing" the democ vector to nonwhite)
# cosine similarity between mean(democracy terms, nonwhite terms) and peace terms, resampling the democracy and nonwhite dictionaries to produce uncertainty estimates
set.seed(123)
dem_acde_nonwhite_resample <- replicate(1000, cosine(rowMeans(cbind(colMeans(glove_vectors[sample(democ_words, length(democ_words), replace = T),]),
                                                                    colMeans(glove_vectors[sample(nonwhite_words, length(nonwhite_words), replace = T),]),
                                                                    dyadic_vector)),
                                                     peace_vector))
dem_acde_nonwhite <- mean(dem_acde_nonwhite_resample) # average cosine sim, akin to "direct effect" of democracy fixing nonwhite
acde_ci_95 <- quantile(dem_acde_nonwhite_resample, probs = c(0.025, 0.975)) # 95% CI
acde_ci_90 <- quantile(dem_acde_nonwhite_resample, probs = c(0.05, 0.95)) # 90% CI

# --- cosine similarity eliminated by nonwhite terms (akin to "eliminated effect")
dem_nonwhite_eliminated <- mean(dem_total_effect_resample-dem_acde_nonwhite_resample)
eliminated_ci_95 <- quantile(dem_total_effect_resample-dem_acde_nonwhite_resample, probs = c(0.025, 0.975))
eliminated_ci_90 <- quantile(dem_total_effect_resample-dem_acde_nonwhite_resample, probs = c(0.05, 0.95))

plot_df_nonwhite <- 
  data.frame(type = c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race"),
             est = c(dem_total_effect, dem_acde_nonwhite, dem_nonwhite_eliminated),
             ci_95_high = c(total_ci_95[2], acde_ci_95[2], eliminated_ci_95[2]),
             ci_95_low = c(total_ci_95[1], acde_ci_95[1], eliminated_ci_95[1]),
             ci_90_high = c(total_ci_90[2], acde_ci_90[2], eliminated_ci_90[2]),
             ci_90_low = c(total_ci_90[1], acde_ci_90[1], eliminated_ci_90[1]))

plot_df_nonwhite$type <- factor(plot_df_nonwhite$type, levels = rev(c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race")))
plot_df_nonwhite$country_race <- c("Unexposed to Race", "Averaged with Nonwhite Vector", "Averaged with Nonwhite Vector")
plot_df_nonwhite$country_race <- factor(plot_df_nonwhite$country_race, levels = c("Unexposed to Race", "Averaged with Nonwhite Vector"))

# --- cosine sim "fixing" white (akin to direct effect or "exposing" the democ vector to white)
set.seed(123)
dem_acde_white_resample <- replicate(1000, cosine(rowMeans(cbind(colMeans(glove_vectors[sample(democ_words, length(democ_words), replace = T),]),
                                                                 colMeans(glove_vectors[sample(white_words, length(white_words), replace = T),]),
                                                                 dyadic_vector)),
                                                  peace_vector))
dem_acde_white <- mean(dem_acde_white_resample) # average cosine sim, akin to "direct effect" of democracy fixing white
acde_ci_95 <- quantile(dem_acde_white_resample, probs = c(0.025, 0.975)) # 95% CI
acde_ci_90 <- quantile(dem_acde_white_resample, probs = c(0.05, 0.95)) # 90% CI

# --- cosine similarity eliminated by white terms (akin to "eliminated effect")
dem_white_eliminated <- mean(dem_total_effect_resample-dem_acde_white_resample)
eliminated_ci_95 <- quantile(dem_total_effect_resample-dem_acde_white_resample, probs = c(0.025, 0.975))
eliminated_ci_90 <- quantile(dem_total_effect_resample-dem_acde_white_resample, probs = c(0.05, 0.95))

plot_df_white <- 
  data.frame(type = c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race"),
             est = c(dem_total_effect, dem_acde_white, dem_white_eliminated),
             ci_95_high = c(total_ci_95[2], acde_ci_95[2], eliminated_ci_95[2]),
             ci_95_low = c(total_ci_95[1], acde_ci_95[1], eliminated_ci_95[1]),
             ci_90_high = c(total_ci_90[2], acde_ci_90[2], eliminated_ci_90[2]),
             ci_90_low = c(total_ci_90[1], acde_ci_90[1], eliminated_ci_90[1]))

plot_df_white$type <- factor(plot_df_white$type, levels = rev(c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race")))
plot_df_white$country_race <- c("Unexposed to Race", "Averaged with White Vector", "Averaged with White Vector")
plot_df_white$country_race <- factor(plot_df_white$country_race, levels = c("Unexposed to Race", "Averaged with White Vector"))

plot_df <- rbind(plot_df_nonwhite, plot_df_white)
# --- figure A15
ggplot(plot_df, aes(x = est, y = type, group = country_race, color = country_race)) +
  geom_vline(xintercept = 0, size = .8, color = "black") +
  geom_linerange(aes(xmin = ci_95_low, xmax = ci_95_high), size = 1.3, color = "gray10", position = position_dodge(width = .5)) +
  geom_linerange(aes(xmin = ci_90_low, xmax = ci_90_high), size = 2.3, position = position_dodge(width = .5)) +
  geom_point(size = 5.5, color = "black", position = position_dodge(width = .5)) +
  geom_point(size = 4.5,position = position_dodge(width = .5)) +  
  scale_color_manual(values=c("gray30", "#92978a", "#c5bec2")) +
  labs(y = NULL, x = "Cosine Estimate", color = "Democracy Vector:") + #, tag = "A"
  theme_minimal() +
  guides(fill = guide_legend(byrow = TRUE)) +
  theme(legend.title = element_text(size = 14),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 14),
        axis.text.x = element_text(size = 14),
        axis.line.y.right = element_blank(),
        axis.title.x = element_text(size = 17, margin = margin(t=15)),
        panel.spacing.x = unit(1.5, "lines"),
        panel.grid.minor.x = element_blank(),
        legend.key = element_rect(color = NA, fill = NA),
        legend.key.size = unit(1, "cm"),
        plot.margin = margin(30,30,30,30))

# --- table B16
t_B16_total <- createTexreg(coef.names = "Total Similarity of Democracy",
                            coef = subset(plot_df, country_race == "Unexposed to Race")[1,]$est,
                            ci.low = subset(plot_df, country_race == "Unexposed to Race")[1,]$ci_95_low,
                            ci.up = subset(plot_df, country_race == "Unexposed to Race")[1,]$ci_95_high,
                            model.name = "Unexposed to Race")

t_B16_white <- createTexreg(coef.names = c("Direct Similarity of Democracy Fixing Race", "Similarity of Democracy Eliminated by Race"),
                            coef = subset(plot_df, country_race == "Averaged with White Vector")$est,
                            ci.low = subset(plot_df, country_race == "Averaged with White Vector")$ci_95_low,
                            ci.up = subset(plot_df, country_race == "Averaged with White Vector")$ci_95_high,
                            model.name = "Averaged with White Vector")

t_B16_nonwhite <- createTexreg(coef.names = c("Direct Similarity of Democracy Fixing Race", "Similarity of Democracy Eliminated by Race"),
                               coef = subset(plot_df, country_race == "Averaged with Nonwhite Vector")$est,
                               ci.low = subset(plot_df, country_race == "Averaged with Nonwhite Vector")$ci_95_low,
                               ci.up = subset(plot_df, country_race == "Averaged with Nonwhite Vector")$ci_95_high,
                               model.name = "Averaged with Nonwhite Vector")

screenreg(list(t_B16_total, t_B16_white, t_B16_nonwhite))

# --- embedding analysis robustness check: war-peace dimension --- #
# here, we run the same analysis above but specify a peace-war dimension, as opposed to simply
# the cosine similarity between democracy and peace terms. 
peace_war_dim <- peace_vector - war_vector
# --- overall cosine similarity (akin to "total effect" of democracy on peace)
# cosine similarity between democracy terms and peace terms, resampling the democracy dictionary to produce uncertainty estimates
set.seed(123)
dem_total_effect_resample <- replicate(1000, cosine(colMeans(glove_vectors[sample(democ_words, length(democ_words), replace = T),]),
                                                    peace_war_dim))
dem_total_effect <- mean(dem_total_effect_resample) # average cosine sim, akin to "total effect" i.e. ATE
total_ci_95 <- quantile(dem_total_effect_resample, probs = c(0.025, 0.975)) # 95% CI
total_ci_90 <- quantile(dem_total_effect_resample, probs = c(0.05, 0.95)) # 90% CI

# --- cosine sim "fixing" nonwhite (akin to direct effect or "exposing" the democ vector to nonwhite)
# cosine similarity between mean(democracy terms, nonwhite terms) and peace terms, resampling the democracy and nonwhite dictionaries to produce uncertainty estimates
set.seed(123)
dem_acde_nonwhite_resample <- replicate(1000, cosine(rowMeans(cbind(colMeans(glove_vectors[sample(democ_words, length(democ_words), replace = T),]),
                                                                    colMeans(glove_vectors[sample(nonwhite_words, length(nonwhite_words), replace = T),]))),
                                                     peace_war_dim))
dem_acde_nonwhite <- mean(dem_acde_nonwhite_resample) # average cosine sim, akin to "direct effect" of democracy fixing nonwhite
acde_ci_95 <- quantile(dem_acde_nonwhite_resample, probs = c(0.025, 0.975)) # 95% CI
acde_ci_90 <- quantile(dem_acde_nonwhite_resample, probs = c(0.05, 0.95)) # 90% CI

# --- cosine similarity eliminated by nonwhite terms (akin to "eliminated effect")
dem_nonwhite_eliminated <- mean(dem_total_effect_resample-dem_acde_nonwhite_resample)
eliminated_ci_95 <- quantile(dem_total_effect_resample-dem_acde_nonwhite_resample, probs = c(0.025, 0.975))
eliminated_ci_90 <- quantile(dem_total_effect_resample-dem_acde_nonwhite_resample, probs = c(0.05, 0.95))

plot_df_nonwhite <- 
  data.frame(type = c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race"),
             est = c(dem_total_effect, dem_acde_nonwhite, dem_nonwhite_eliminated),
             ci_95_high = c(total_ci_95[2], acde_ci_95[2], eliminated_ci_95[2]),
             ci_95_low = c(total_ci_95[1], acde_ci_95[1], eliminated_ci_95[1]),
             ci_90_high = c(total_ci_90[2], acde_ci_90[2], eliminated_ci_90[2]),
             ci_90_low = c(total_ci_90[1], acde_ci_90[1], eliminated_ci_90[1]))

plot_df_nonwhite$type <- factor(plot_df_nonwhite$type, levels = rev(c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race")))
plot_df_nonwhite$country_race <- c("Unexposed to Race", "Averaged with Nonwhite Vector", "Averaged with Nonwhite Vector")
plot_df_nonwhite$country_race <- factor(plot_df_nonwhite$country_race, levels = c("Unexposed to Race", "Averaged with Nonwhite Vector"))

# --- cosine sim "fixing" white (akin to direct effect or "exposing" the democ vector to white)
set.seed(123)
dem_acde_white_resample <- replicate(1000, cosine(rowMeans(cbind(colMeans(glove_vectors[sample(democ_words, length(democ_words), replace = T),]),
                                                                 colMeans(glove_vectors[sample(white_words, length(white_words), replace = T),]))),
                                                  peace_war_dim))
dem_acde_white <- mean(dem_acde_white_resample) # average cosine sim, akin to "direct effect" of democracy fixing white
acde_ci_95 <- quantile(dem_acde_white_resample, probs = c(0.025, 0.975)) # 95% CI
acde_ci_90 <- quantile(dem_acde_white_resample, probs = c(0.05, 0.95)) # 90% CI

# --- cosine similarity eliminated by white terms (akin to "eliminated effect")
dem_white_eliminated <- mean(dem_total_effect_resample-dem_acde_white_resample)
eliminated_ci_95 <- quantile(dem_total_effect_resample-dem_acde_white_resample, probs = c(0.025, 0.975))
eliminated_ci_90 <- quantile(dem_total_effect_resample-dem_acde_white_resample, probs = c(0.05, 0.95))

plot_df_white <- 
  data.frame(type = c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race"),
             est = c(dem_total_effect, dem_acde_white, dem_white_eliminated),
             ci_95_high = c(total_ci_95[2], acde_ci_95[2], eliminated_ci_95[2]),
             ci_95_low = c(total_ci_95[1], acde_ci_95[1], eliminated_ci_95[1]),
             ci_90_high = c(total_ci_90[2], acde_ci_90[2], eliminated_ci_90[2]),
             ci_90_low = c(total_ci_90[1], acde_ci_90[1], eliminated_ci_90[1]))

plot_df_white$type <- factor(plot_df_white$type, levels = rev(c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race")))
plot_df_white$country_race <- c("Unexposed to Race", "Averaged with White Vector", "Averaged with White Vector")
plot_df_white$country_race <- factor(plot_df_white$country_race, levels = c("Unexposed to Race", "Averaged with White Vector"))

plot_df <- rbind(plot_df_nonwhite, plot_df_white)
# --- figure A16
ggplot(plot_df, aes(x = est, y = type, group = country_race, color = country_race)) +
  geom_vline(xintercept = 0, size = .8, color = "black") +
  geom_linerange(aes(xmin = ci_95_low, xmax = ci_95_high), size = 1.3, color = "gray10", position = position_dodge(width = .5)) +
  geom_linerange(aes(xmin = ci_90_low, xmax = ci_90_high), size = 2.3, position = position_dodge(width = .5)) +
  geom_point(size = 5.5, color = "black", position = position_dodge(width = .5)) +
  geom_point(size = 4.5,position = position_dodge(width = .5)) +  
  scale_color_manual(values=c("gray30", "#92978a", "#c5bec2")) +
  labs(y = NULL, x = "Cosine Estimate", color = "Democracy Vector:") + #, tag = "A"
  theme_minimal() +
  guides(fill = guide_legend(byrow = TRUE)) +
  theme(legend.title = element_text(size = 14),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 14),
        axis.text.x = element_text(size = 14),
        axis.line.y.right = element_blank(),
        axis.title.x = element_text(size = 17, margin = margin(t=15)),
        panel.spacing.x = unit(1.5, "lines"),
        panel.grid.minor.x = element_blank(),
        legend.key = element_rect(color = NA, fill = NA),
        legend.key.size = unit(1, "cm"),
        plot.margin = margin(30,30,30,30))

# --- table B17
t_B17_total <- createTexreg(coef.names = "Total Similarity of Democracy",
                            coef = subset(plot_df, country_race == "Unexposed to Race")[1,]$est,
                            ci.low = subset(plot_df, country_race == "Unexposed to Race")[1,]$ci_95_low,
                            ci.up = subset(plot_df, country_race == "Unexposed to Race")[1,]$ci_95_high,
                            model.name = "Unexposed to Race")

t_B17_white <- createTexreg(coef.names = c("Direct Similarity of Democracy Fixing Race", "Similarity of Democracy Eliminated by Race"),
                            coef = subset(plot_df, country_race == "Averaged with White Vector")$est,
                            ci.low = subset(plot_df, country_race == "Averaged with White Vector")$ci_95_low,
                            ci.up = subset(plot_df, country_race == "Averaged with White Vector")$ci_95_high,
                            model.name = "Averaged with White Vector")

t_B17_nonwhite <- createTexreg(coef.names = c("Direct Similarity of Democracy Fixing Race", "Similarity of Democracy Eliminated by Race"),
                               coef = subset(plot_df, country_race == "Averaged with Nonwhite Vector")$est,
                               ci.low = subset(plot_df, country_race == "Averaged with Nonwhite Vector")$ci_95_low,
                               ci.up = subset(plot_df, country_race == "Averaged with Nonwhite Vector")$ci_95_high,
                               model.name = "Averaged with Nonwhite Vector")

screenreg(list(t_B17_total, t_B17_white, t_B17_nonwhite))

# --- embedding analysis: nearest neighbors for key theoretical constructs --- #
democ_vectors <- colMeans(glove_vectors[democ_words,])
nonwhite_vectors <- colMeans(glove_vectors[nonwhite_words,])
white_vectors <- colMeans(glove_vectors[white_words,])

# "non-white" is the 11th and "non-european" the 14th nearest neighbors, as mentioned in the appendix text
(nonwhite_dem_nns <- find_nns(target_embedding = rowMeans(cbind(democ_vectors, nonwhite_vectors)),
                              pre_trained = glove_vectors, N = 35,
                              candidates = NULL, norm = "l2", stem = FALSE))
# "western" is the 22nd and "european" the 35th nearest neighbors, as mentioned in the appendix text
(white_dem_nns <- find_nns(target_embedding = rowMeans(cbind(democ_vectors, white_vectors)),
                             pre_trained = glove_vectors, N = 35,
                             candidates = NULL, norm = "l2", stem = FALSE))
# in short, racial terms are more salient in the nonwhite democracy vector than in the white democracy vector.

# --- embedding analysis robustness check: does the argument scale to elites? --- #
# here, we run the same analysis above but analyze speeches in the UK Parliament (1945-2000)
# although our paper focuses on public opinion, this analysis provides an initial assessment of our
# expectations at the elite level 
nonwhite_words_uk <- c("nonwhite", "noneuropean") 
white_words_uk <- c("white", "european") 
peace_vector_uk <- colMeans(uk_vectors[c("peace", "harmony", "agreement", "diplomacy"),])

# --- overall cosine similarity (akin to "total effect" of democracy on peace)
# cosine similarity between democracy terms and peace terms, resampling the democracy dictionary to produce uncertainty estimates
set.seed(123)
dem_total_effect_resample <- replicate(1000, cosine(colMeans(uk_vectors[sample(democ_words, length(democ_words), replace = T),]),
                                                    peace_vector_uk))
dem_total_effect <- mean(dem_total_effect_resample) # average cosine sim, akin to "total effect" i.e. ATE
total_ci_95 <- quantile(dem_total_effect_resample, probs = c(0.025, 0.975)) # 95% CI
total_ci_90 <- quantile(dem_total_effect_resample, probs = c(0.05, 0.95)) # 90% CI

# --- cosine sim "fixing" nonwhite (akin to direct effect or "exposing" the democ vector to nonwhite)
# cosine similarity between mean(democracy terms, nonwhite terms) and peace terms, resampling the democracy and nonwhite dictionaries to produce uncertainty estimates
set.seed(123)
dem_acde_nonwhite_resample <- replicate(1000, cosine(rowMeans(cbind(colMeans(uk_vectors[sample(democ_words, length(democ_words), replace = T),]),
                                                                    colMeans(uk_vectors[sample(nonwhite_words_uk, length(nonwhite_words_uk), replace = T),]))),
                                                     peace_vector_uk))
dem_acde_nonwhite <- mean(dem_acde_nonwhite_resample) # average cosine sim, akin to "direct effect" of democracy fixing nonwhite
acde_ci_95 <- quantile(dem_acde_nonwhite_resample, probs = c(0.025, 0.975)) # 95% CI
acde_ci_90 <- quantile(dem_acde_nonwhite_resample, probs = c(0.05, 0.95)) # 90% CI

# --- cosine similarity eliminated by nonwhite terms (akin to "eliminated effect")
dem_nonwhite_eliminated <- mean(dem_total_effect_resample-dem_acde_nonwhite_resample)
eliminated_ci_95 <- quantile(dem_total_effect_resample-dem_acde_nonwhite_resample, probs = c(0.025, 0.975))
eliminated_ci_90 <- quantile(dem_total_effect_resample-dem_acde_nonwhite_resample, probs = c(0.05, 0.95))

plot_df_nonwhite <- 
  data.frame(type = c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race"),
             est = c(dem_total_effect, dem_acde_nonwhite, dem_nonwhite_eliminated),
             ci_95_high = c(total_ci_95[2], acde_ci_95[2], eliminated_ci_95[2]),
             ci_95_low = c(total_ci_95[1], acde_ci_95[1], eliminated_ci_95[1]),
             ci_90_high = c(total_ci_90[2], acde_ci_90[2], eliminated_ci_90[2]),
             ci_90_low = c(total_ci_90[1], acde_ci_90[1], eliminated_ci_90[1]))

plot_df_nonwhite$type <- factor(plot_df_nonwhite$type, levels = rev(c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race")))
plot_df_nonwhite$country_race <- c("Unexposed to Race", "Averaged with Nonwhite Vector", "Averaged with Nonwhite Vector")
plot_df_nonwhite$country_race <- factor(plot_df_nonwhite$country_race, levels = c("Unexposed to Race", "Averaged with Nonwhite Vector"))

# --- cosine sim "fixing" white (akin to direct effect or "exposing" the democ vector to white)
set.seed(123)
dem_acde_white_resample <- replicate(1000, cosine(rowMeans(cbind(colMeans(uk_vectors[sample(democ_words, length(democ_words), replace = T),]),
                                                                 colMeans(uk_vectors[sample(white_words_uk, length(white_words_uk), replace = T),]))),
                                                  peace_vector_uk))
dem_acde_white <- mean(dem_acde_white_resample) # average cosine sim, akin to "direct effect" of democracy fixing white
acde_ci_95 <- quantile(dem_acde_white_resample, probs = c(0.025, 0.975)) # 95% CI
acde_ci_90 <- quantile(dem_acde_white_resample, probs = c(0.05, 0.95)) # 90% CI

# --- cosine similarity eliminated by white terms (akin to "eliminated effect")
dem_white_eliminated <- mean(dem_total_effect_resample-dem_acde_white_resample)
eliminated_ci_95 <- quantile(dem_total_effect_resample-dem_acde_white_resample, probs = c(0.025, 0.975))
eliminated_ci_90 <- quantile(dem_total_effect_resample-dem_acde_white_resample, probs = c(0.05, 0.95))

plot_df_white <- 
  data.frame(type = c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race"),
             est = c(dem_total_effect, dem_acde_white, dem_white_eliminated),
             ci_95_high = c(total_ci_95[2], acde_ci_95[2], eliminated_ci_95[2]),
             ci_95_low = c(total_ci_95[1], acde_ci_95[1], eliminated_ci_95[1]),
             ci_90_high = c(total_ci_90[2], acde_ci_90[2], eliminated_ci_90[2]),
             ci_90_low = c(total_ci_90[1], acde_ci_90[1], eliminated_ci_90[1]))

plot_df_white$type <- factor(plot_df_white$type, levels = rev(c("Total Similarity\nof Democracy", "Direct Similarity of\nDemocracy Fixing Race", "Similarity of Democracy\nEliminated by Race")))
plot_df_white$country_race <- c("Unexposed to Race", "Averaged with White Vector", "Averaged with White Vector")
plot_df_white$country_race <- factor(plot_df_white$country_race, levels = c("Unexposed to Race", "Averaged with White Vector"))

plot_df <- rbind(plot_df_nonwhite, plot_df_white)
# --- figure A17
ggplot(plot_df, aes(x = est, y = type, group = country_race, color = country_race)) +
  geom_vline(xintercept = 0, size = .8, color = "black") +
  geom_linerange(aes(xmin = ci_95_low, xmax = ci_95_high), size = 1.3, color = "gray10", position = position_dodge(width = .5)) +
  geom_linerange(aes(xmin = ci_90_low, xmax = ci_90_high), size = 2.3, position = position_dodge(width = .5)) +
  geom_point(size = 5.5, color = "black", position = position_dodge(width = .5)) +
  geom_point(size = 4.5,position = position_dodge(width = .5)) +  
  scale_color_manual(values=c("gray30", "#92978a", "#c5bec2")) +
  labs(y = NULL, x = "Cosine Estimate", color = "Democracy Vector:") + #, tag = "A"
  theme_minimal() +
  guides(fill = guide_legend(byrow = TRUE)) +
  theme(legend.title = element_text(size = 14),
        legend.text = element_text(size = 14),
        axis.text.y = element_text(size = 14),
        axis.text.x = element_text(size = 14),
        axis.line.y.right = element_blank(),
        axis.title.x = element_text(size = 17, margin = margin(t=15)),
        panel.spacing.x = unit(1.5, "lines"),
        panel.grid.minor.x = element_blank(),
        legend.key = element_rect(color = NA, fill = NA),
        legend.key.size = unit(1, "cm"),
        plot.margin = margin(30,30,30,30))

# --- table B18
t_B18_total <- createTexreg(coef.names = "Total Similarity of Democracy",
                            coef = subset(plot_df, country_race == "Unexposed to Race")[1,]$est,
                            ci.low = subset(plot_df, country_race == "Unexposed to Race")[1,]$ci_95_low,
                            ci.up = subset(plot_df, country_race == "Unexposed to Race")[1,]$ci_95_high,
                            model.name = "Unexposed to Race")

t_B18_white <- createTexreg(coef.names = c("Direct Similarity of Democracy Fixing Race", "Similarity of Democracy Eliminated by Race"),
                            coef = subset(plot_df, country_race == "Averaged with White Vector")$est,
                            ci.low = subset(plot_df, country_race == "Averaged with White Vector")$ci_95_low,
                            ci.up = subset(plot_df, country_race == "Averaged with White Vector")$ci_95_high,
                            model.name = "Averaged with White Vector")

t_B18_nonwhite <- createTexreg(coef.names = c("Direct Similarity of Democracy Fixing Race", "Similarity of Democracy Eliminated by Race"),
                               coef = subset(plot_df, country_race == "Averaged with Nonwhite Vector")$est,
                               ci.low = subset(plot_df, country_race == "Averaged with Nonwhite Vector")$ci_95_low,
                               ci.up = subset(plot_df, country_race == "Averaged with Nonwhite Vector")$ci_95_high,
                               model.name = "Averaged with Nonwhite Vector")

screenreg(list(t_B18_total, t_B18_white, t_B18_nonwhite))
